/*
 * TOP SECRET Copyright 2006-2015 Transsion.com All right reserved. This software is the confidential and proprietary
 * information of Transsion.com ("Confidential Information"). You shall not disclose such Confidential Information and
 * shall use it only in accordance with the terms of the license agreement you entered into with Transsion.com.
 */
package com.yunji.framework_template.biz.crawler.in;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.annotation.Resource;

import org.springframework.stereotype.Service;

import com.yunji.framework_template.biz.crawler.ContentImageHandler;
import com.yunji.framework_template.biz.crawler.NewsCrawler;
import com.yunji.framework_template.biz.crawler.SourceType;
import com.yunji.framework_template.common.enumeration.CountryCode;
import com.yunji.framework_template.common.enumeration.NewsType;
import com.yunji.framework_template.common.util.NumberUtil;

/**
 * ClassName:NationNewsCrawler <br/>
 * Date: 2018年12月18日 下午3:31:24 <br/>
 * 
 * @author fenglibin1982@163.com
 * @Blog http://blog.csdn.net/fenglibing
 * @version
 * @see
 */
@Service
public class IndianexpressNewsCrawler extends NewsCrawler {

    @Resource
    private IndianexpressNewsImageHandler indianexpressNewsImageHandler;

    @Override
    public boolean isOkUrl(String url) {
        // 通过判断url的path中是否包括年月来判断其是否为新闻详情页
        URI uri;
        try {
            if (url == null || url.trim().length() == 0) {
                return false;
            }
            if (url.startsWith("mailto") || url.toLowerCase().startsWith("javascript")) {
                return false;
            }
            uri = new URI(url);
            String host = uri.getHost();
            if(host==null) {
                return false;
            }
            if (host.indexOf("indianexpress.com") < 0) {
                return false;
            }
            String path = uri.getPath();
            String[] pathArr = path.split("/");
            if (pathArr.length >= 4 && (pathArr[pathArr.length - 1].split("-").length >= 3 || NumberUtil.isNumber(pathArr[pathArr.length - 1]))) {
                return true;
            }
        } catch (URISyntaxException e) {
        }
        return false;
    }

    @Override
    public ContentImageHandler getContentImageHandler() {
        return indianexpressNewsImageHandler;
    }

    @Override
    public List<SourceType> getSourceTypeList() {
        List<SourceType> sourceTypeList = new ArrayList<SourceType>();
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/india/").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/world/").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/cities/").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/opinion/").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/explained/").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/sports/").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/technology/").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/parenting/").newsType(NewsType.HEALTH).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/trending/").newsType(NewsType.MILITARY).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/").newsType(NewsType.CAR).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/photos/").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/").newsType(NewsType.FINANCE).build());

        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/technology/mobile-tabs/").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/technology/tech-reviews/").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/technology/tech-news-technology/").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/technology/science/").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/technology/techook/").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/technology/social/").newsType(NewsType.TECH).build());
        
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/bollywood/").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/hollywood/").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/television/").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/telugu/").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/tamil/").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/malayalam/").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/regional/").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/web-series/").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/entertainment/movie-review/").newsType(NewsType.ASKANDANSWER).build());

        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/cities/delhi/").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/cities/mumbai/").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/cities/kolkata/").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/cities/pune/").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/cities/chandigarh/").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/cities/ahmedabad/").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/cities/lucknow/").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/cities/jaipur/").newsType(NewsType.HOUSE).build());

        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/books/").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/fashion/").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/health/").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/fitness/").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/art-and-culture/").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/workplace/").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/feelings/").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/life-style/").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/food-wine/").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/lifestyle/destination-of-the-week/").newsType(NewsType.HEALTH).build());

        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/parenting/nutrition/").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/parenting/health-fitness/").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/parenting/family/").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/parenting/learning/").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/parenting/blog/").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/parenting/events-things-to-do/").newsType(NewsType.LIFESTYLE).build());

        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/trending/trending-globally/").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/trending/trending-in-india/").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/trending/viral-videos-trending/").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/section/trending/this-is-serious/").newsType(NewsType.IMAGE).build());

        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/tech-video/").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/news-video/").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/entertainment-video/").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/originals-video/").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/auto-video/").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/the-new-york-times-video/").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/idea-exchange-video/").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/lifestyle-video/").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/videos/sports-video/").newsType(NewsType.ASKANDANSWER).build());

        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/photos/entertainment-gallery/").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/photos/sports-gallery/").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/photos/india-news/").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/photos/lifestyle-gallery/").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/photos/trending-gallery/").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/photos/technology-gallery/").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/photos/photo-archives/").newsType(NewsType.FUN).build());

        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/3-things/").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/the-expresso-podcast/").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/hear-me-too/").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/express-elections/").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/likh-the-process/").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/difficult-conversations-with-your-kids/").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/what-to-watch-this-weekend/").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/the-sandip-roy-show/").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://indianexpress.com/audio/talking-books/").newsType(NewsType.ASKANDANSWER).build());

        return sourceTypeList;
    }

    @Override
    public Set<String> getCountryCodeSet() {
        Set<String> countryCodeSet = new HashSet<String>();
        countryCodeSet.add(CountryCode.IN.name());
        return countryCodeSet;
    }
}
